from time import sleep

import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime


def get_food_safety_news(url):
    try:
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
        # response = requests.get(url, headers=headers)
        response = requests.get(url)
        response.raise_for_status()
        response.encoding = response.apparent_encoding
        soup = BeautifulSoup(response.text, 'html.parser')

        news_data = []
        # 查找所有新闻项
        news_items = soup.find_all('div', class_='list_item')
        today = datetime.now().strftime('%Y-%m-%d')
        for item in news_items:
            # 查找新闻发布日期
            date_elem = item.find('span', class_='time')
            if date_elem:
                news_date = date_elem.get_text().strip()
                if news_date == today:
                    # 查找新闻标题和链接
                    a_tag = item.find('a')
                    if a_tag:
                        title = a_tag.get_text().strip()
                        link = a_tag.get('href')
                        if link and not link.startswith('http'):
                            link = url.rsplit('/', 1)[0] + '/' + link
                        news_data.append([title, link])
            sleep(5)

        return news_data
    except requests.RequestException as e:
        print(f"请求出错: {e}")
    except Exception as e:
        print(f"发生未知错误: {e}")
    return []


if __name__ == "__main__":
    target_url = "https://www.cnki.net//news/list-144-1.html"
    news = get_food_safety_news(target_url)

    if news:
        df = pd.DataFrame(news, columns=['标题', '链接'])
        df.to_excel('food_safety_news.xlsx', index=False)
        print("新闻数据已保存到 food_safety_news.xlsx")
    else:
        print("未获取到今日的新闻数据。")
